Get Data

We’ll load the raw data from all 8 .xls files. Each file shows the participant’s name, group, language, media, and x/y eye gaze coordinates. Here’s a quick glimpse of the data and how it’s structured after I’ve processed and cleaned it up.

# Import packages we'll need.
library(tidyverse)
library(stringr)
library(lme4)
library(lmerTest)
library(png)
library(grid)
# # Gather up the files
# files <- list.files(pattern = "\\.csv",path="../Adult Data/rawdata")
# files <- str_c("rawdata/",files)
# rawdata <- do.call("rbind", lapply(files, read_csv))
# 
# # Clean Up
# rawdataoriginal <- rawdata
# rawdata <- rawdataoriginal
# rawdata <- rawdata %>%
#   rename(participant = ParticipantName,
#          group = "[Group]Value",
#          language = "[Language]Value",
#          media = MediaName,
#          x = "GazePointX (MCSpx)",
#          y = "GazePointY (MCSpx)",
#          gazeindex = GazePointIndex) %>%
#   select(participant,group,language,media,x,y,gazeindex) %>%
#   add_column(story=NA,direction=NA,maingroup=NA) %>%
#   mutate(story = case_when(
#     str_detect(media,"bears") ~ "bears",
#     str_detect(media,"cinderella") ~ "cinderella",   
#     str_detect(media,"midas") ~ "midas",
#     str_detect(media,"redridinghood") ~ "redridinghood")) %>%
#   mutate(direction = case_when(
#     str_detect(media,"FW") ~ "forward",
#     str_detect(media,"ER") ~ "reversed")) %>%
#   mutate(media = str_c(story,direction, sep="_")) %>%
#   mutate(language = case_when(
#     str_detect(language,"EarlyASL") ~ "Early",
#     str_detect(language,"LateASL") ~ "Late",
#     str_detect(language,"NoviceASL_Trained") ~ "Trained",
#     str_detect(language,"NoviceASL") ~ "Novice")) %>%
#   mutate(maingroup = str_c(group,language, sep="")) %>%
#   filter(!is.na(maingroup))
# glimpse(rawdata)

Now that it’s in the right format…it’s easy to get what we need! :-D

Analysis

First, let’s trim each participant’s data, getting rid of the first 30 samples (0.5 secs). Then we’ll get the the mean x and y coordinate for each story for each participant.

#write_csv(rawdata,"adultviewingspaceraw.csv")
rawdata <- read_csv("../Adult Data/rawdata/adultviewingspaceraw.csv")
Parsed with column specification:
cols(
  participant = col_character(),
  group = col_character(),
  language = col_character(),
  media = col_character(),
  x = col_integer(),
  y = col_integer(),
  gazeindex = col_integer(),
  story = col_character(),
  direction = col_character(),
  maingroup = col_character()
)
rawdata <- rawdata %>%
  arrange(participant,media,gazeindex) %>%
  group_by(participant,media) %>%
  slice(30:n())
means <- rawdata %>%
  group_by(participant,media) %>%
  summarise(x = mean(x,na.rm=TRUE),
            y = mean(y,na.rm=TRUE))
head(means,10)

And I can get x or y plots of one participant across 4 stories. Let’s do Adam (me?). We’ll set the x and y limits to the whole width of the Tobii monitor (1600x1200). But because Tobii considers (0,0) to be the upper left corner (and not the bottom left corner), we also need to flip the y axis.

adam <- filter(rawdata,participant=="Adam") %>% mutate(y = y*-1)
ggplot(adam,aes(x=x,y=y,color=media)) + geom_point(size=0.1) + geom_path() + facet_wrap("media",ncol=2,nrow=2) + guides(color="none") + scale_x_continuous(limit=c(0,1600)) + scale_y_continuous(limit=c(-1200,0))

Cool, yeah?

Let’s try this again but let the x and y limits match the data. That will “zoom” in. We’ll also get rid of that weird right-side outlier in RRH. Neatooooo.

adam <- filter(adam,x<800)
ggplot(adam,aes(x=x,y=y,color=media)) + geom_point(size=0.1) + geom_path() + facet_wrap("media",ncol=2,nrow=2) + guides(color="none") 

IQR

Now let’s get the middle 50% (aka the IQR) of x and y for each participant’s story (we’ve already trimmed the first 30 samples). That should also take care of further weird outliers. And we are defining “viewing space” as the IQR of the x and y axis.

iqr <- rawdata %>%
  group_by(participant,media) %>%
  dplyr::summarize(xIQR = IQR(x,na.rm=TRUE),
                   yIQR = IQR(y,na.rm=TRUE),
                   xmed = median(x, na.rm=TRUE),
                   ymed = median(y, na.rm=TRUE))
head(iqr,10)

And check out the histograms:

iqr %>% 
  gather(axis,iqr,xIQR:yIQR) %>%
  ggplot(aes(x=iqr,fill=axis)) + geom_histogram() + facet_grid(axis~.)

There’s one weird outlier for xIQR. That’s Rebecca. Let’s look at her data.

rebecca <- filter(rawdata,participant=="Rebecca") %>% mutate(y = y*-1)
ggplot(rebecca,aes(x=x,y=y,color=media)) + geom_point(size=0.1) + geom_path() + facet_wrap("media",ncol=2,nrow=2) + guides(color="none") + xlim(0,1440) + ylim(-1080,0)

So we’ll remove Rebecca’s midas_forward story. Next, check the medians.

iqr %>% 
  gather(axis,med,xmed:ymed) %>%
  ggplot(aes(x=med,fill=axis)) + geom_histogram() + facet_grid(axis~.)

Looks great! But it also looks pretty interesting doesn’t it…Y median has a wider spread than X median. That would make sense.

Now we’re ready to do stats based on group, direction, etc.

rbc <- tribble(~participant, ~media, "Rebecca","midas_forward")
iqr <- iqr %>%
  ungroup() %>%
  anti_join(rbc, by=c("participant","media")) # for some reason filter wouldn't work
subjectinfo <- rawdata %>%
  select(participant,maingroup,group,language,media,story,direction) %>%
  distinct() %>%
  filter(!is.na(participant))
iqr <- iqr %>%
  left_join(subjectinfo,by=c("participant","media")) %>%
  filter(!is.na(maingroup))
iqr.gather <- iqr %>% gather(axis,value,xIQR:ymed)
iqr.iqr <- filter(iqr.gather,axis=="xIQR" | axis=="yIQR")
iqr.med <- filter(iqr.gather,axis=="xmed" | axis=="ymed")
ggplot(iqr.iqr,aes(x=maingroup,y=value,fill=direction)) + 
  geom_boxplot() + theme(axis.text.x=element_text(angle=45,hjust=1)) +
  facet_grid(.~axis)

And the median x and y position (this assumes all calibrations are correct):

ggplot(iqr.med,aes(x=maingroup,y=value,fill=direction)) + 
  geom_boxplot() + theme(axis.text.x=element_text(angle=45,hjust=1)) +
  facet_grid(.~axis)

First, does reversal have an effect on X IQR? We have random intercepts for each participant and media, and a random slope adjustment for reversed for each participant.

xiqr.reversal <- lmer(xIQR ~ direction + (direction|participant) + (1|media), data = iqr)
summary(xiqr.reversal)$coefficients
                   Estimate Std. Error       df   t value     Pr(>|t|)
(Intercept)       37.067311   4.297525 5.879626 8.6252701 0.0001491978
directionreversed  2.396059   5.922401 5.288700 0.4045755 0.7016363535

Welp. No. Y IQR?

yiqr.reversal <- lmer(yIQR ~ direction + (direction|participant) + (1|media), data = iqr)
summary(yiqr.reversal)$coefficients
                   Estimate Std. Error       df    t value     Pr(>|t|)
(Intercept)       46.043362   4.200520 6.970763 10.9613480 1.200581e-05
directionreversed  4.845994   5.370947 4.578010  0.9022605 4.118721e-01

No effect here either. Let’s try adding maingroups. X IQR first.

xiqr.group <- lmer(xIQR ~ direction * maingroup + (direction|participant) + (1|media), data = iqr)
summary(xiqr.group)$coefficients
                                            Estimate Std. Error         df     t value     Pr(>|t|)
(Intercept)                               34.6927610   4.977661   9.371920  6.96969146 5.346237e-05
directionreversed                          6.1771409   6.649067   7.438029  0.92902372 3.820371e-01
maingroupDeafLate                         -3.1765664   4.967385  78.798842 -0.63948467 5.243625e-01
maingroupHearingEarly                     -0.4499422   7.743459  73.051139 -0.05810609 9.538228e-01
maingroupHearingLate                       7.1504388   4.359923  75.858667  1.64003778 1.051379e-01
maingroupHearingNovice                     1.7218180   4.676980  74.285758  0.36814740 7.138103e-01
maingroupHearingTrained                    6.1822390   4.333229  74.566382  1.42670499 1.578388e-01
directionreversed:maingroupDeafLate       -5.3237257   5.606349 109.885316 -0.94958867 3.444051e-01
directionreversed:maingroupHearingEarly   -5.7646039   8.485478 106.947583 -0.67934934 4.983842e-01
directionreversed:maingroupHearingLate    -7.9499453   4.850258 110.002650 -1.63907694 1.040542e-01
directionreversed:maingroupHearingNovice  -4.5441561   5.174925 107.927462 -0.87811047 3.818344e-01
directionreversed:maingroupHearingTrained -2.8665334   4.835685 109.082262 -0.59278748 5.545503e-01

This means there is a significant difference of DeafEarly vs. HearingLate and HearingTrained on xIQR, but in the forward condition only. Basically, those two groups have a “wider” viewing space than other groups.

yiqr.group <- lmer(yIQR ~ direction * maingroup + (direction|participant) + (1|media), data = iqr)
summary(yiqr.group)$coefficients
                                            Estimate Std. Error         df     t value     Pr(>|t|)
(Intercept)                               43.6712994   5.694180  22.801965  7.66946239 9.299788e-08
directionreversed                          2.2511171   6.477516   9.643778  0.34752785 7.356598e-01
maingroupDeafLate                          0.5128043   8.350353  72.818563  0.06141110 9.512001e-01
maingroupHearingEarly                     -4.5089659  13.105397  67.201619 -0.34405412 7.318798e-01
maingroupHearingLate                       0.6217192   7.364000  69.503009  0.08442684 9.329598e-01
maingroupHearingNovice                     4.4728826   7.906578  68.286947  0.56571661 5.734407e-01
maingroupHearingTrained                    8.7840578   7.325681  68.444490  1.19907732 2.346327e-01
directionreversed:maingroupDeafLate       -6.0754371   8.026624 142.796568 -0.75691059 4.503505e-01
directionreversed:maingroupHearingEarly    1.0060946  12.144589 140.217132  0.08284304 9.340945e-01
directionreversed:maingroupHearingLate     4.4331711   6.951655 142.667852  0.63771449 5.246817e-01
directionreversed:maingroupHearingNovice   8.5502379   7.409162 140.930045  1.15400881 2.504498e-01
directionreversed:maingroupHearingTrained  5.8176819   6.927937 141.706193  0.83974235 4.024674e-01

No differences among groups or reversal effect for xIQR. Viewing space doesn’t get significantly taller or shorter.

Viewing Space Charts

I want to learn how to make rectangle plots so here we go. Using each participant’s four x and y medians and 4 x and y IQRs (one set for each story, for 4 stories). So I can get the logic and code down. Let’s assume all calibrations were correct. Here’s the chart for the whole media size of 1440x1080 (as reported in Tobii).

# In this order, we'll get a grand median by taking a participant's median across their 4 stories, than the median for forward and reverse across all participants. 
medians <- iqr %>%
  group_by(maingroup, participant,direction) %>%
  dplyr::summarize(xIQR = median(xIQR,na.rm=TRUE),
                   yIQR = median(yIQR,na.rm=TRUE),
                   xmed = median(xmed,na.rm=TRUE),
                   ymed = median(ymed,na.rm=TRUE)) %>%
  group_by(maingroup, direction) %>% 
  dplyr::summarize(xIQR = median(xIQR,na.rm=TRUE),
                   yIQR = median(yIQR,na.rm=TRUE),
                   x = median(xmed,na.rm=TRUE),
                   y = median(ymed,na.rm=TRUE))
medians <- medians %>%
  mutate(y = y*-1,
         xmin = x-(xIQR/2),
         xmax = x+(xIQR/2),
         ymin = y-(yIQR/2),
         ymax = y+(yIQR/2))
img <- readPNG("cindy.png")
g <- rasterGrob(img, interpolate=TRUE, width=unit(1,"npc"), height=unit(1,"npc")) 
ggplot(medians, aes(fill=direction,color=direction)) +
  annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
  geom_rect(aes(xmin=xmin,ymin=ymin,xmax=xmax,ymax=ymax),alpha=.1) + 
  theme_linedraw() + 
  scale_x_continuous(limits = c(0,1440), expand = c(0, 0)) +
  scale_y_continuous(limits = c(-1080,0), expand = c(0, 0)) +
  facet_wrap("maingroup")

# ggplot(iqr.global, aes(fill=direction,color=direction)) +
#   annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
#   geom_rect(aes(xmin=xmin,ymin=ymin,xmax=xmax,ymax=ymax),alpha=.1) + 
#   theme_minimal() + xlim(0,1440) + ylim(-1080,0) +
#   geom_hline(yintercept=-1080+885) +
#   geom_hline(yintercept=-1080+525) + 
#   annotate(geom="text", x = 300, y = -1080+555, label = "upper shoulder point") +
#   annotate(geom="point", x = 535, y = -1080+525) + 
#   annotate(geom="text", x = 535, y = -1080+910, label = "height line") + 
#   annotate(geom="rect", xmin = 535, xmax = 535+365, ymin = -525-551, ymax = -1080+525, fill="maroon", color="black", alpha=0.5) + 
#   annotate(geom="text", x = 700, y = -900, label = "torso")

Yayyy! It worked! A bit hard to see! Let’s zoom in.

# ggplot(iqr.global, aes(fill=direction,color=direction)) +
#   annotation_custom(g, xmin=0, xmax=1440, ymin=-1080, ymax=0) +
#   geom_rect(aes(xmin=xmin,ymin=ymin,xmax=xmax,ymax=ymax),alpha=.1) + 
#   theme_minimal() + xlim(600,800) + ylim(-500,-300)
ggplot(medians, aes(fill=direction,color=direction)) +
  geom_rect(aes(xmin=xmin,ymin=ymin,xmax=xmax,ymax=ymax),alpha=.1) + 
  theme_minimal() + xlim(677,743) + ylim(-421,-370)

#ggplot(iqr.global, aes(fill=direction,color=direction)) +
#  geom_rect(aes(xmin=xmin,ymin=ymin,xmax=xmax,ymax=ymax),alpha=.1)

I have to figure out how to get Cindy to zoom in - or crop a zoomed in version of Cindy manually and use that.

Viewing Space Charts for Individuals

Now let’s see the variation in viewing spaces for all our individuals. Should be fun.

iqr.individuals <- iqr %>%
  rename(x = xmed,
         y = ymed) %>%
  mutate(y = y*-1,
         xmin = x-(xIQR/2),
         xmax = x+(xIQR/2),
         ymin = y-(yIQR/2),
         ymax = y+(yIQR/2))
ggplot(iqr.individuals, aes(fill=direction,color=direction)) +
  annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
  geom_rect(aes(xmin=xmin,ymin=ymin,xmax=xmax,ymax=ymax),alpha=.1) + 
  theme_minimal() + xlim(0,1440) + ylim(-1080,0) + facet_wrap("direction") +
  ggtitle("with IQRs")

Standard Deviations?

Let’s try using standard deviations instead.

sd <- rawdata %>%
  group_by(participant,media) %>%
  dplyr::summarize(xsd = sd(x,na.rm=TRUE),
                   ysd = sd(y,na.rm=TRUE),
                   xmean = mean(x, na.rm=TRUE),
                   ymean = mean(y, na.rm=TRUE))
sd.individuals <- sd %>%
  rename(x = xmean,
         y = ymean) %>%
  mutate(y = y*-1,
         xmin = x-xsd,
         xmax = x+xsd,
         ymin = y-ysd,
         ymax = y+ysd) %>%
  left_join(subjectinfo,by=c("participant","media")) %>%
  filter(!is.na(maingroup))
ggplot(sd.individuals, aes(fill=direction,color=direction)) +
  annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
  geom_rect(aes(xmin=xmin,ymin=ymin,xmax=xmax,ymax=ymax),alpha=.1) + 
  theme_minimal() + xlim(0,1440) + ylim(-1080,0) + facet_wrap("direction") +
  ggtitle("with SDs")

Now let’s make Outer Limits charts which is IQR +/- 2 SDs.

sd.individuals <- select(sd.individuals,participant,media,xsd,ysd)
iqrsd.individuals <- left_join(iqr.individuals,sd.individuals,by=c("participant","media")) %>%
  mutate(xmin = xmin-(2*xsd),
         xmax = xmax+(2*xsd),
         ymin = ymin-(2*ysd),
         ymax = ymax+(2*ysd))
ggplot(iqrsd.individuals, aes(fill=direction,color=direction)) +
  annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
  geom_rect(aes(xmin=xmin,ymin=ymin,xmax=xmax,ymax=ymax),alpha=.1) + 
  theme_minimal() + xlim(0,1440) + ylim(-1080,0) + facet_wrap("direction") +
  ggtitle("with SDs")

iqrsd.individuals <- iqrsd.individuals %>%
  group_by(direction) %>%
  dplyr::summarize(x = mean(x,na.rm=TRUE),
            y = mean(y,na.rm=TRUE),
            xmin = mean(xmin,na.rm=TRUE),
            ymin = mean(ymin,na.rm=TRUE),
            xmax = mean(xmax,na.rm=TRUE),
            ymax = mean(ymax,na.rm=TRUE))
ggplot(iqrsd.individuals, aes(fill=direction,color=direction)) +
  annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
  geom_rect(aes(xmin=xmin,ymin=ymin,xmax=xmax,ymax=ymax),alpha=.1) + 
  theme_minimal() + xlim(0,1440) + ylim(-1080,0) + facet_wrap("direction") +
  ggtitle("Average of above chart (rain's outer limits)")

XY Space Data - Multiple Plots

First let’s prep the data.

multiples <- rawdata %>%
  filter(!is.na(x)) %>%
  filter(!is.na(y)) %>%
  rename(lang = maingroup,
         name = participant) %>%
  group_by(name, lang, direction, story) %>%
  summarise(xIQR = IQR(x,na.rm=TRUE),
            yIQR = IQR(y,na.rm=TRUE),
            xmed = median(x, na.rm=TRUE),
            ymed = median(y, na.rm=TRUE),
            area = xIQR*yIQR,
            x_90 = quantile(x, .95, na.rm=TRUE) - quantile(x, .05, na.rm=TRUE),
            y_90 = quantile(y, .95, na.rm=TRUE) - quantile(y, .05, na.rm=TRUE),
            area_90 = (x_90) * (y_90),
            x_mean = mean(x, na.rm = TRUE),
            y_mean = mean(y, na.rm = TRUE),
            x_sd = sd(x, na.rm = TRUE),
            y_sd = sd(y, na.rm = TRUE),
            x_1sd = (x_mean+x_sd) - (x_mean-x_sd),
            y_1sd = (y_mean+y_sd) - (y_mean-y_sd),
            area_1sd = x_1sd * y_1sd,
            x_2sd = (x_mean+(x_sd*2)) - (x_mean-(x_sd*2)),
            y_2sd = (y_mean+(y_sd*2)) - (y_mean-(y_sd*2)),
            area_2sd = x_2sd * y_2sd) %>%
  group_by(name, lang, direction) %>%
  summarise_if(is.double, funs(mean), na.rm = T) %>%
  group_by(lang, direction) %>%
  summarise_if(is.double, funs(mean), na.rm = T) %>%
  filter(lang == "DeafEarly" || lang == "HearingNovice")
img <- png::readPNG("cindy.png")
g <- grid::rasterGrob(img, interpolate=TRUE, width=unit(1,"npc"), height=unit(1,"npc")) 

IQR (Middle 50%)

Let’s see.

curr_data <- multiples %>% 
  ungroup() %>%
  select(lang, direction, xmed, ymed, xIQR, yIQR) %>%
  group_by(lang, direction) %>%
  summarise(xmin = xmed-(xIQR/2),
         xmax = xmed+(xIQR/2),
         ymin = -1*(ymed-(yIQR/2)),
         ymax = -1*(ymed+(yIQR/2)))
ggplot(curr_data, aes(fill=direction,color=direction)) +
  annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
  geom_rect(aes(xmin=xmin,ymin=ymin,xmax=xmax,ymax=ymax),alpha=.1) + 
  theme_linedraw() +
  scale_x_continuous(limits = c(0,1440), expand = c(0, 0)) +
  scale_y_continuous(limits = c(-1080,0), expand = c(0, 0)) +
  facet_wrap("lang")

Middle 90%

So I calculated the average median across, and the middle 90% of the data.

curr_data <- multiples %>% 
  ungroup() %>%
  select(lang, direction, xmed, ymed, x_90, y_90) %>%
  group_by(lang, direction) %>%
  summarise(xmin = xmed-(x_90/2),
         xmax = xmed+(x_90/2),
         ymin = -1*(ymed-(y_90/2)),
         ymax = -1*(ymed+(y_90/2)))
ggplot(curr_data, aes(fill=direction,color=direction)) +
  annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
  geom_rect(aes(xmin=xmin,ymin=ymin,xmax=xmax,ymax=ymax),alpha=.1) + 
  theme_linedraw() +
  scale_x_continuous(limits = c(0,1440), expand = c(0, 0)) +
  scale_y_continuous(limits = c(-1080,0), expand = c(0, 0)) +
  facet_wrap("lang")

±1 SD (Middle 68%)

So this is using the mean of the means, plus or minus one SD. This is equivalent to middle 68%.

curr_data <- multiples %>% 
  ungroup() %>%
  select(lang, direction, x_mean, y_mean, x_1sd, y_1sd) %>%
  group_by(lang, direction) %>%
  summarise(xmin = x_mean-(x_1sd/2),
         xmax = x_mean+(x_1sd/2),
         ymin = -1*(y_mean-(y_1sd/2)),
         ymax = -1*(y_mean+(y_1sd/2)))
ggplot(curr_data, aes(fill=direction,color=direction)) +
  annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
  geom_rect(aes(xmin=xmin,ymin=ymin,xmax=xmax,ymax=ymax),alpha=.1) + 
  theme_linedraw() +
  scale_x_continuous(limits = c(0,1440), expand = c(0, 0)) +
  scale_y_continuous(limits = c(-1080,0), expand = c(0, 0)) +
  facet_wrap("lang")

±2 SD (Middle 96%)

And this is using the mean of the means, plus or minus two SD. This is equivalent to middle 96%.

curr_data <- multiples %>% 
  ungroup() %>%
  select(lang, direction, x_mean, y_mean, x_2sd, y_2sd) %>%
  group_by(lang, direction) %>%
  summarise(xmin = x_mean-(x_2sd/2),
         xmax = x_mean+(x_2sd/2),
         ymin = -1*(y_mean-(y_2sd/2)),
         ymax = -1*(y_mean+(y_2sd/2)))
ggplot(curr_data, aes(fill=direction,color=direction)) +
  annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
  geom_rect(aes(xmin=xmin,ymin=ymin,xmax=xmax,ymax=ymax),alpha=.1) + 
  theme_linedraw() +
  scale_x_continuous(limits = c(0,1440), expand = c(0, 0)) +
  scale_y_continuous(limits = c(-1080,0), expand = c(0, 0)) +
  facet_wrap("lang")

---
title: "Viewing Space (study1adults)"
author: "Adam Stone, PhD" 
date: '`r format(Sys.Date(), "%m-%d-%Y")`'
output:
  github_document:
    toc: yes
    toc_depth: 2
  html_notebook:
    code_folding: hide
    theme: paper
    highlight: tango
    toc: yes
    toc_depth: 2
    toc_float: yes
    df_print: paged
---

# Get Data
We'll load the raw data from all 8 .xls files. Each file shows the participant's name, group, language, media, and x/y eye gaze coordinates. Here's a quick glimpse of the data and how it's structured after I've processed and cleaned it up. 
```{r, message=FALSE, warning=FALSE}
# Import packages we'll need.
library(tidyverse)
library(stringr)
library(lme4)
library(lmerTest)
library(png)
library(grid)

# # Gather up the files
# files <- list.files(pattern = "\\.csv",path="../Adult Data/rawdata")
# files <- str_c("rawdata/",files)
# rawdata <- do.call("rbind", lapply(files, read_csv))
# 
# # Clean Up
# rawdataoriginal <- rawdata
# rawdata <- rawdataoriginal
# rawdata <- rawdata %>%
#   rename(participant = ParticipantName,
#          group = "[Group]Value",
#          language = "[Language]Value",
#          media = MediaName,
#          x = "GazePointX (MCSpx)",
#          y = "GazePointY (MCSpx)",
#          gazeindex = GazePointIndex) %>%
#   select(participant,group,language,media,x,y,gazeindex) %>%
#   add_column(story=NA,direction=NA,maingroup=NA) %>%
#   mutate(story = case_when(
#     str_detect(media,"bears") ~ "bears",
#     str_detect(media,"cinderella") ~ "cinderella",   
#     str_detect(media,"midas") ~ "midas",
#     str_detect(media,"redridinghood") ~ "redridinghood")) %>%
#   mutate(direction = case_when(
#     str_detect(media,"FW") ~ "forward",
#     str_detect(media,"ER") ~ "reversed")) %>%
#   mutate(media = str_c(story,direction, sep="_")) %>%
#   mutate(language = case_when(
#     str_detect(language,"EarlyASL") ~ "Early",
#     str_detect(language,"LateASL") ~ "Late",
#     str_detect(language,"NoviceASL_Trained") ~ "Trained",
#     str_detect(language,"NoviceASL") ~ "Novice")) %>%
#   mutate(maingroup = str_c(group,language, sep="")) %>%
#   filter(!is.na(maingroup))
# glimpse(rawdata)
```

Now that it's in the right format...it's easy to get what we need! :-D 

# Analysis

First, let's trim each participant's data, getting rid of the first 30 samples (0.5 secs). Then we'll get the the mean x and y coordinate for each story for each participant.

```{r}

#write_csv(rawdata,"adultviewingspaceraw.csv")
rawdata <- read_csv("../Adult Data/rawdata/adultviewingspaceraw.csv")

rawdata <- rawdata %>%
  arrange(participant,media,gazeindex) %>%
  group_by(participant,media) %>%
  slice(30:n())

means <- rawdata %>%
  group_by(participant,media) %>%
  summarise(x = mean(x,na.rm=TRUE),
            y = mean(y,na.rm=TRUE))
head(means,10)
```

And I can get x or y plots of one participant across 4 stories. Let's do Adam (me?). We'll set the x and y limits to the whole width of the Tobii monitor (1600x1200). But because Tobii considers (0,0) to be the upper left corner (and not the bottom left corner), we also need to flip the y axis. 

```{r}
adam <- filter(rawdata,participant=="Adam") %>% mutate(y = y*-1)
ggplot(adam,aes(x=x,y=y,color=media)) + geom_point(size=0.1) + geom_path() + facet_wrap("media",ncol=2,nrow=2) + guides(color="none") + scale_x_continuous(limit=c(0,1600)) + scale_y_continuous(limit=c(-1200,0))
```
Cool, yeah? 

Let's try this again but let the x and y limits match the data. That will "zoom" in. We'll also get rid of that weird right-side outlier in RRH.  Neatooooo. 

```{r}
adam <- filter(adam,x<800)
ggplot(adam,aes(x=x,y=y,color=media)) + geom_point(size=0.1) + geom_path() + facet_wrap("media",ncol=2,nrow=2) + guides(color="none") 
```

# IQR 
Now let's get the middle 50% (aka the IQR) of x and y for each participant's story (we've already trimmed the first 30 samples). That should also take care of further weird outliers. And we are defining "viewing space" as the IQR of the x and y axis. 

```{r}
iqr <- rawdata %>%
  group_by(participant,media) %>%
  dplyr::summarize(xIQR = IQR(x,na.rm=TRUE),
                   yIQR = IQR(y,na.rm=TRUE),
                   xmed = median(x, na.rm=TRUE),
                   ymed = median(y, na.rm=TRUE))
head(iqr,10)
```

And check out the histograms:

```{r}
iqr %>% 
  gather(axis,iqr,xIQR:yIQR) %>%
  ggplot(aes(x=iqr,fill=axis)) + geom_histogram() + facet_grid(axis~.)
```

There's one weird outlier for xIQR. That's Rebecca. Let's look at her data. 

```{r}
rebecca <- filter(rawdata,participant=="Rebecca") %>% mutate(y = y*-1)
ggplot(rebecca,aes(x=x,y=y,color=media)) + geom_point(size=0.1) + geom_path() + facet_wrap("media",ncol=2,nrow=2) + guides(color="none") + xlim(0,1440) + ylim(-1080,0)
```

So we'll remove Rebecca's midas_forward story. Next, check the medians.

```{r}
iqr %>% 
  gather(axis,med,xmed:ymed) %>%
  ggplot(aes(x=med,fill=axis)) + geom_histogram() + facet_grid(axis~.)

```

Looks great! But it also looks pretty interesting doesn't it...Y median has a wider spread than X median. That would make sense. 

Now we're ready to do stats based on group, direction, etc. 

```{r}
rbc <- tribble(~participant, ~media, "Rebecca","midas_forward")
iqr <- iqr %>%
  ungroup() %>%
  anti_join(rbc, by=c("participant","media")) # for some reason filter wouldn't work

subjectinfo <- rawdata %>%
  select(participant,maingroup,group,language,media,story,direction) %>%
  distinct() %>%
  filter(!is.na(participant))

iqr <- iqr %>%
  left_join(subjectinfo,by=c("participant","media")) %>%
  filter(!is.na(maingroup))

iqr.gather <- iqr %>% gather(axis,value,xIQR:ymed)
iqr.iqr <- filter(iqr.gather,axis=="xIQR" | axis=="yIQR")
iqr.med <- filter(iqr.gather,axis=="xmed" | axis=="ymed")


ggplot(iqr.iqr,aes(x=maingroup,y=value,fill=direction)) + 
  geom_boxplot() + theme(axis.text.x=element_text(angle=45,hjust=1)) +
  facet_grid(.~axis)
```

And the median x and y position (this assumes all calibrations are correct):

```{r}
ggplot(iqr.med,aes(x=maingroup,y=value,fill=direction)) + 
  geom_boxplot() + theme(axis.text.x=element_text(angle=45,hjust=1)) +
  facet_grid(.~axis)
```


First, does reversal have an effect on X IQR? We have random intercepts for each participant and media, and a random slope adjustment for reversed for each participant. 

```{r}
xiqr.reversal <- lmer(xIQR ~ direction + (direction|participant) + (1|media), data = iqr)
summary(xiqr.reversal)$coefficients
```
 
Welp. No. Y IQR?
```{r}
yiqr.reversal <- lmer(yIQR ~ direction + (direction|participant) + (1|media), data = iqr)
summary(yiqr.reversal)$coefficients
```

No effect here either. Let's try adding maingroups. X IQR first. 
```{r}
xiqr.group <- lmer(xIQR ~ direction * maingroup + (direction|participant) + (1|media), data = iqr)
summary(xiqr.group)$coefficients
```

This means there is a significant difference of DeafEarly vs. HearingLate and HearingTrained on xIQR, but in the forward condition only. Basically, those two groups have a "wider" viewing space than other groups.

```{r}
yiqr.group <- lmer(yIQR ~ direction * maingroup + (direction|participant) + (1|media), data = iqr)
summary(yiqr.group)$coefficients
```
No differences among groups or reversal effect for xIQR. Viewing space doesn't get significantly taller or shorter. 

# Viewing Space Charts
I want to learn how to make rectangle plots so here we go. Using each participant's four x and y medians and 4 x and y IQRs (one set for each story, for 4 stories). So I can get the logic and code down. Let's assume all calibrations were correct. Here's the chart for the whole media size of 1440x1080 (as reported in Tobii). 
```{r}
# In this order, we'll get a grand median by taking a participant's median across their 4 stories, than the median for forward and reverse across all participants. 
medians <- iqr %>%
  group_by(maingroup, participant,direction) %>%
  dplyr::summarize(xIQR = median(xIQR,na.rm=TRUE),
                   yIQR = median(yIQR,na.rm=TRUE),
                   xmed = median(xmed,na.rm=TRUE),
                   ymed = median(ymed,na.rm=TRUE)) %>%
  group_by(maingroup, direction) %>% 
  dplyr::summarize(xIQR = median(xIQR,na.rm=TRUE),
                   yIQR = median(yIQR,na.rm=TRUE),
                   x = median(xmed,na.rm=TRUE),
                   y = median(ymed,na.rm=TRUE))

medians <- medians %>%
  mutate(y = y*-1,
         xmin = x-(xIQR/2),
         xmax = x+(xIQR/2),
         ymin = y-(yIQR/2),
         ymax = y+(yIQR/2))

img <- readPNG("cindy.png")
g <- rasterGrob(img, interpolate=TRUE, width=unit(1,"npc"), height=unit(1,"npc")) 

ggplot(medians, aes(fill=direction,color=direction)) +
  annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
  geom_rect(aes(xmin=xmin,ymin=ymin,xmax=xmax,ymax=ymax),alpha=.1) + 
  theme_linedraw() + 
  scale_x_continuous(limits = c(0,1440), expand = c(0, 0)) +
  scale_y_continuous(limits = c(-1080,0), expand = c(0, 0)) +
  facet_wrap("maingroup")

# ggplot(iqr.global, aes(fill=direction,color=direction)) +
#   annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
#   geom_rect(aes(xmin=xmin,ymin=ymin,xmax=xmax,ymax=ymax),alpha=.1) + 
#   theme_minimal() + xlim(0,1440) + ylim(-1080,0) +
#   geom_hline(yintercept=-1080+885) +
#   geom_hline(yintercept=-1080+525) + 
#   annotate(geom="text", x = 300, y = -1080+555, label = "upper shoulder point") +
#   annotate(geom="point", x = 535, y = -1080+525) + 
#   annotate(geom="text", x = 535, y = -1080+910, label = "height line") + 
#   annotate(geom="rect", xmin = 535, xmax = 535+365, ymin = -525-551, ymax = -1080+525, fill="maroon", color="black", alpha=0.5) + 
#   annotate(geom="text", x = 700, y = -900, label = "torso")
```

Yayyy! It worked! A bit hard to see! Let's zoom in. 

```{r}
# ggplot(iqr.global, aes(fill=direction,color=direction)) +
#   annotation_custom(g, xmin=0, xmax=1440, ymin=-1080, ymax=0) +
#   geom_rect(aes(xmin=xmin,ymin=ymin,xmax=xmax,ymax=ymax),alpha=.1) + 
#   theme_minimal() + xlim(600,800) + ylim(-500,-300)

ggplot(medians, aes(fill=direction,color=direction)) +
  geom_rect(aes(xmin=xmin,ymin=ymin,xmax=xmax,ymax=ymax),alpha=.1) + 
  theme_minimal() + xlim(677,743) + ylim(-421,-370)

#ggplot(iqr.global, aes(fill=direction,color=direction)) +
#  geom_rect(aes(xmin=xmin,ymin=ymin,xmax=xmax,ymax=ymax),alpha=.1)
```
I have to figure out how to get Cindy to zoom in - or crop a zoomed in version of Cindy manually and use that.

# Viewing Space Charts for Individuals
Now let's see the variation in viewing spaces for all our individuals. Should be fun.

```{r fig.height=10, fig.width=26}
iqr.individuals <- iqr %>%
  rename(x = xmed,
         y = ymed) %>%
  mutate(y = y*-1,
         xmin = x-(xIQR/2),
         xmax = x+(xIQR/2),
         ymin = y-(yIQR/2),
         ymax = y+(yIQR/2))

ggplot(iqr.individuals, aes(fill=direction,color=direction)) +
  annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
  geom_rect(aes(xmin=xmin,ymin=ymin,xmax=xmax,ymax=ymax),alpha=.1) + 
  theme_minimal() + xlim(0,1440) + ylim(-1080,0) + facet_wrap("direction") +
  ggtitle("with IQRs")
```

# Standard Deviations?

Let's try using standard deviations instead.

```{r fig.height=10, fig.width=26}
sd <- rawdata %>%
  group_by(participant,media) %>%
  dplyr::summarize(xsd = sd(x,na.rm=TRUE),
                   ysd = sd(y,na.rm=TRUE),
                   xmean = mean(x, na.rm=TRUE),
                   ymean = mean(y, na.rm=TRUE))

sd.individuals <- sd %>%
  rename(x = xmean,
         y = ymean) %>%
  mutate(y = y*-1,
         xmin = x-xsd,
         xmax = x+xsd,
         ymin = y-ysd,
         ymax = y+ysd) %>%
  left_join(subjectinfo,by=c("participant","media")) %>%
  filter(!is.na(maingroup))


ggplot(sd.individuals, aes(fill=direction,color=direction)) +
  annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
  geom_rect(aes(xmin=xmin,ymin=ymin,xmax=xmax,ymax=ymax),alpha=.1) + 
  theme_minimal() + xlim(0,1440) + ylim(-1080,0) + facet_wrap("direction") +
  ggtitle("with SDs")


```

Now let's make Outer Limits charts which is IQR +/- 2 SDs. 
```{r fig.height=10, fig.width=26}
sd.individuals <- select(sd.individuals,participant,media,xsd,ysd)
iqrsd.individuals <- left_join(iqr.individuals,sd.individuals,by=c("participant","media")) %>%
  mutate(xmin = xmin-(2*xsd),
         xmax = xmax+(2*xsd),
         ymin = ymin-(2*ysd),
         ymax = ymax+(2*ysd))

ggplot(iqrsd.individuals, aes(fill=direction,color=direction)) +
  annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
  geom_rect(aes(xmin=xmin,ymin=ymin,xmax=xmax,ymax=ymax),alpha=.1) + 
  theme_minimal() + xlim(0,1440) + ylim(-1080,0) + facet_wrap("direction") +
  ggtitle("with SDs")
```

```{r fig.height=10, fig.width=26}
iqrsd.individuals <- iqrsd.individuals %>%
  group_by(direction) %>%
  dplyr::summarize(x = mean(x,na.rm=TRUE),
            y = mean(y,na.rm=TRUE),
            xmin = mean(xmin,na.rm=TRUE),
            ymin = mean(ymin,na.rm=TRUE),
            xmax = mean(xmax,na.rm=TRUE),
            ymax = mean(ymax,na.rm=TRUE))
ggplot(iqrsd.individuals, aes(fill=direction,color=direction)) +
  annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
  geom_rect(aes(xmin=xmin,ymin=ymin,xmax=xmax,ymax=ymax),alpha=.1) + 
  theme_minimal() + xlim(0,1440) + ylim(-1080,0) + facet_wrap("direction") +
  ggtitle("Average of above chart (rain's outer limits)")

```

# XY Space Data - Multiple Plots

First let's prep the data. 
```{r}
multiples <- rawdata %>%
  filter(!is.na(x)) %>%
  filter(!is.na(y)) %>%
  rename(lang = maingroup,
         name = participant) %>%
  group_by(name, lang, direction, story) %>%
  summarise(xIQR = IQR(x,na.rm=TRUE),
            yIQR = IQR(y,na.rm=TRUE),
            xmed = median(x, na.rm=TRUE),
            ymed = median(y, na.rm=TRUE),
            area = xIQR*yIQR,
            x_90 = quantile(x, .95, na.rm=TRUE) - quantile(x, .05, na.rm=TRUE),
            y_90 = quantile(y, .95, na.rm=TRUE) - quantile(y, .05, na.rm=TRUE),
            area_90 = (x_90) * (y_90),
            x_mean = mean(x, na.rm = TRUE),
            y_mean = mean(y, na.rm = TRUE),
            x_sd = sd(x, na.rm = TRUE),
            y_sd = sd(y, na.rm = TRUE),
            x_1sd = (x_mean+x_sd) - (x_mean-x_sd),
            y_1sd = (y_mean+y_sd) - (y_mean-y_sd),
            area_1sd = x_1sd * y_1sd,
            x_2sd = (x_mean+(x_sd*2)) - (x_mean-(x_sd*2)),
            y_2sd = (y_mean+(y_sd*2)) - (y_mean-(y_sd*2)),
            area_2sd = x_2sd * y_2sd) %>%
  group_by(name, lang, direction) %>%
  summarise_if(is.double, funs(mean), na.rm = T) %>%
  group_by(lang, direction) %>%
  summarise_if(is.double, funs(mean), na.rm = T) %>%
  filter(lang == "DeafEarly" || lang == "HearingNovice")

img <- png::readPNG("cindy.png")
g <- grid::rasterGrob(img, interpolate=TRUE, width=unit(1,"npc"), height=unit(1,"npc")) 

```

## IQR (Middle 50%)
Let's see. 
```{r}
curr_data <- multiples %>% 
  ungroup() %>%
  select(lang, direction, xmed, ymed, xIQR, yIQR) %>%
  group_by(lang, direction) %>%
  summarise(xmin = xmed-(xIQR/2),
         xmax = xmed+(xIQR/2),
         ymin = -1*(ymed-(yIQR/2)),
         ymax = -1*(ymed+(yIQR/2)))

ggplot(curr_data, aes(fill=direction,color=direction)) +
  annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
  geom_rect(aes(xmin=xmin,ymin=ymin,xmax=xmax,ymax=ymax),alpha=.1) + 
  theme_linedraw() +
  scale_x_continuous(limits = c(0,1440), expand = c(0, 0)) +
  scale_y_continuous(limits = c(-1080,0), expand = c(0, 0)) +
  facet_wrap("lang")
```

## Middle 90%
So I calculated the average median across, and the middle 90% of the data. 
```{r}
curr_data <- multiples %>% 
  ungroup() %>%
  select(lang, direction, xmed, ymed, x_90, y_90) %>%
  group_by(lang, direction) %>%
  summarise(xmin = xmed-(x_90/2),
         xmax = xmed+(x_90/2),
         ymin = -1*(ymed-(y_90/2)),
         ymax = -1*(ymed+(y_90/2)))

ggplot(curr_data, aes(fill=direction,color=direction)) +
  annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
  geom_rect(aes(xmin=xmin,ymin=ymin,xmax=xmax,ymax=ymax),alpha=.1) + 
  theme_linedraw() +
  scale_x_continuous(limits = c(0,1440), expand = c(0, 0)) +
  scale_y_continuous(limits = c(-1080,0), expand = c(0, 0)) +
  facet_wrap("lang")
```


## ±1 SD (Middle 68%)
So this is using the mean of the means, plus or minus one SD.  This is equivalent to middle 68%. 
```{r}
curr_data <- multiples %>% 
  ungroup() %>%
  select(lang, direction, x_mean, y_mean, x_1sd, y_1sd) %>%
  group_by(lang, direction) %>%
  summarise(xmin = x_mean-(x_1sd/2),
         xmax = x_mean+(x_1sd/2),
         ymin = -1*(y_mean-(y_1sd/2)),
         ymax = -1*(y_mean+(y_1sd/2)))

ggplot(curr_data, aes(fill=direction,color=direction)) +
  annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
  geom_rect(aes(xmin=xmin,ymin=ymin,xmax=xmax,ymax=ymax),alpha=.1) + 
  theme_linedraw() +
  scale_x_continuous(limits = c(0,1440), expand = c(0, 0)) +
  scale_y_continuous(limits = c(-1080,0), expand = c(0, 0)) +
  facet_wrap("lang")
```

## ±2 SD (Middle 96%)
And this is using the mean of the means, plus or minus two SD.  This is equivalent to middle 96%. 
```{r}
curr_data <- multiples %>% 
  ungroup() %>%
  select(lang, direction, x_mean, y_mean, x_2sd, y_2sd) %>%
  group_by(lang, direction) %>%
  summarise(xmin = x_mean-(x_2sd/2),
         xmax = x_mean+(x_2sd/2),
         ymin = -1*(y_mean-(y_2sd/2)),
         ymax = -1*(y_mean+(y_2sd/2)))

ggplot(curr_data, aes(fill=direction,color=direction)) +
  annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
  geom_rect(aes(xmin=xmin,ymin=ymin,xmax=xmax,ymax=ymax),alpha=.1) + 
  theme_linedraw() +
  scale_x_continuous(limits = c(0,1440), expand = c(0, 0)) +
  scale_y_continuous(limits = c(-1080,0), expand = c(0, 0)) +
  facet_wrap("lang")
```
